Skim mercury data

Have a look at each of the variables

skim(mercury_data)
Data summary
Name mercury_data
Number of rows 513
Number of columns 20
_______________________
Column type frequency:
character 3
factor 5
numeric 12
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
URL 0 1.00 40 1777 0 79 0
Sex of Sample (if only one sample) 498 0.03 4 6 0 2 0
Main Findings/Comments 408 0.20 12 476 0 76 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
Authors 0 1.00 FALSE 44 Rüd: 36, K. : 33, N M: 30, Dou: 29
Title 0 1.00 FALSE 44 A f: 36, Dis: 33, Mer: 30, A C: 29
Location Description 0 1.00 FALSE 152 Ion: 30, Lak: 20, Riv: 18, Moj: 16
Region 0 1.00 FALSE 10 Nor: 148, Asi: 90, Nor: 63, Sou: 48
Type of Fish 5 0.99 FALSE 192 Atl: 30, Pik: 23, Sna: 20, Rai: 17

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Published 0 1.00 2004.53 13.63 1975.00 1997.00 2007.00 2017.00 2020.00 ▃▁▅▃▇
Data collected (first) 47 0.91 1999.29 13.74 1972.00 1989.25 2000.50 2010.00 2017.00 ▃▂▃▃▇
Data collected (last) 248 0.52 2007.18 10.35 1973.00 2000.00 2011.00 2012.00 2017.00 ▁▁▂▁▇
Latitude 0 1.00 21.09 28.79 -41.83 1.44 25.08 45.27 71.04 ▃▆▇▇▇
Longitude 0 1.00 22.19 86.38 -116.63 -74.82 16.62 107.44 178.07 ▇▁▇▃▅
Number of Samples 261 0.49 11.82 38.80 1.00 1.00 5.00 11.00 547.00 ▇▁▁▁▁
Length of Fish (cm) 283 0.45 91.43 126.65 5.03 22.51 42.30 108.50 939.60 ▇▁▁▁▁
Weight of Fish (g) 264 0.49 29650.35 85129.48 2.00 184.00 1020.00 3550.00 540000.00 ▇▁▁▁▁
Age (years) 430 0.16 9.83 6.11 2.00 4.65 8.00 15.00 21.00 ▇▃▃▂▅
Conc Hg Fish [ug/g] 3 0.99 0.46 0.72 0.00 0.07 0.24 0.57 10.10 ▇▁▁▁▁
Conc Hg Water [ug/mL] 506 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.04 ▇▁▁▁▁
Conc Hg Sediment [ug/g] 456 0.11 0.35 0.71 0.00 0.01 0.06 0.14 2.63 ▇▁▁▁▁

How many of each kind of fish do we have?

Table

(fish_summary <-
    mercury_data %>% 
  mutate(`Number of Samples` = if_else(is.na(`Number of Samples`),1,`Number of Samples`)) %>% 
  group_by(`Type of Fish`) %>% 
  summarise(Samples = sum(`Number of Samples`),
            mean_conc = round( mean(`Conc Hg Fish [ug/g]`),3),
            sd_conc =  round( sd(`Conc Hg Fish [ug/g]`),3),
            .groups = "drop") %>% 
   {mutate(.,`Type of Fish` = factor(x = `Type of Fish`, 
                                     levels = `Type of Fish`[order(Samples,decreasing = T)], 
                                     ordered = T))} %>% 
   arrange(desc(Samples))) %>% 
  DT::datatable()

Bar graph (All)

fish_summary %>% 
  ggplot(aes(x = `Type of Fish`,y = Samples))+
  geom_bar(stat = "identity")+
  theme(axis.text.x = element_text(angle = 90,hjust = 1,size = 4))

Bar graph (Top 15)

fish_summary %>% 
  filter(as.numeric(`Type of Fish`)<=15) %>% 
  {ggplot(.,aes(x = `Type of Fish`,y = Samples))+
  geom_bar(stat = "identity")+
  theme(axis.text.x = element_text(angle = 90,hjust = 1, size = 7))} %>% ggplotly(height = 500)

Fish Size/Age/Weight

Fish Length

All

mercury_data %>% 
  {ggplot(.,aes(x = `Length of Fish (cm)`))+
      geom_histogram()} %>% 
  ggplotly

Small Fish Length

Length < 1m

mercury_data %>% 
  filter(`Length of Fish (cm)`<100) %>% 
  {ggplot(.,aes(x = `Length of Fish (cm)`))+
      geom_histogram()} %>% 
  ggplotly

Fish Weight

All

mercury_data %>% 
  {ggplot(.,aes(x = `Weight of Fish (g)`))+
      geom_histogram()} %>% 
  ggplotly

Small Fish Weight

Weight < 10kg

mercury_data %>% 
  filter(`Weight of Fish (g)`< 10000) %>% 
  {ggplot(.,aes(x = `Weight of Fish (g)`))+
      geom_histogram()} %>% 
  ggplotly

Fish Age

All

mercury_data %>% 
  {ggplot(.,aes(x = `Age (years)`))+
      geom_histogram()} %>% 
  ggplotly

Young Fish Age

Age < 10 years

mercury_data %>% 
  filter(`Age (years)`<10) %>% 
  {ggplot(.,aes(x = `Age (years)`))+
      geom_histogram()} %>% 
  ggplotly

Fish Size/Weight

All

mercury_data %>%
  select(`Length of Fish (cm)`,
         `Weight of Fish (g)`,
         `Type of Fish`) %>%
  na.omit %>%
  {ggplot(.,aes(x = `Length of Fish (cm)`,
                y = `Weight of Fish (g)`,
                color = `Type of Fish`))+
      geom_point()+
      coord_cartesian(xlim = c(0,300))+
      theme(legend.position = 'none')} %>% 
  ggplotly

Big Fish size/weight (Tuna/Shark)

mercury_data %>%
  select(`Length of Fish (cm)`,
         `Weight of Fish (g)`,
         `Type of Fish`) %>%
  na.omit %>%
  filter(`Length of Fish (cm)`>100,
         `Weight of Fish (g)`>10000) %>% 
  # filter(grepl("(tuna|shark)",
  #              as.character(`Type of Fish`),
  #              ignore.case = TRUE)) %>% 
  {ggplot(.,aes(x = `Length of Fish (cm)`,
                y = `Weight of Fish (g)`,
                color = `Type of Fish`))+
      geom_point()+
      coord_cartesian(xlim = c(0,300))+
      theme(legend.position = 'none')} %>% 
  ggplotly

Small Fish size/weight

mercury_data %>%
  select(`Length of Fish (cm)`,
         `Weight of Fish (g)`,
         `Type of Fish`) %>%
  na.omit %>%
  filter(`Length of Fish (cm)`<=100,
         `Weight of Fish (g)`<=10000) %>% 
  # filter(!grepl("(tuna|shark)",
  #               as.character(`Type of Fish`),
  #               ignore.case = TRUE)) %>% 
  {ggplot(.,aes(x = `Length of Fish (cm)`,
                y = `Weight of Fish (g)`,
                color = `Type of Fish`))+
      geom_point()+
      theme(legend.position = 'none')} %>% 
  ggplotly

Fish age/weight

mercury_data %>%
  select(`Age (years)`,
         `Weight of Fish (g)`,
         `Type of Fish`) %>%
  na.omit %>% 
  {ggplot(.,aes(x = `Age (years)`,
                y = `Weight of Fish (g)`,
                color = `Type of Fish`))+
      geom_point()+
      theme(legend.position = 'none')} %>% 
  ggplotly

Fish age/length

mercury_data %>%
  select(`Age (years)`,
         `Length of Fish (cm)`,
         `Type of Fish`) %>%
  na.omit %>% 
  {ggplot(.,aes(x = `Age (years)`,
                y = `Length of Fish (cm)`,
                color = `Type of Fish`))+
      geom_point()+
      theme(legend.position = 'none')} %>% 
  ggplotly

Concentration of Mercury

Fish Conc

All

mercury_data %>% 
  {ggplot(.,aes(x = `Conc Hg Fish [ug/g]`))+
      geom_vline(xintercept = 0.5, color = "red")+
      geom_histogram()} %>% 
  ggplotly

Excluding highest

Everything below 4 ug/g

mercury_data %>% 
  filter(`Conc Hg Fish [ug/g]`<4) %>% 
  {ggplot(.,aes(x = `Conc Hg Fish [ug/g]`))+
      geom_vline(xintercept = 0.5, color = "red")+
      geom_histogram()} %>% 
  ggplotly

Sediment Conc

All

mercury_data %>% 
  {ggplot(.,aes(x = `Conc Hg Sediment [ug/g]`))+
      geom_histogram()} %>% 
  ggplotly

Excluding highest

Everything below 1 ug/g

mercury_data %>% 
  filter(`Conc Hg Sediment [ug/g]`<1) %>% 
  {ggplot(.,aes(x = `Conc Hg Sediment [ug/g]`))+
      geom_histogram()} %>% 
  ggplotly

Fish conc/length

mercury_data %>%
  select(`Conc Hg Fish [ug/g]`,
         `Length of Fish (cm)`,
         `Type of Fish`) %>%
  na.omit %>% 
  {ggplot(.,aes(x = `Conc Hg Fish [ug/g]`,
                y = `Length of Fish (cm)`,
                color = `Type of Fish`))+
      geom_vline(xintercept = 0.5,color = "red")+
      geom_point()+
      theme(legend.position = 'none')} %>% 
  ggplotly

Fish conc/weight

mercury_data %>%
  select(`Conc Hg Fish [ug/g]`,
         `Weight of Fish (g)`,
         `Type of Fish`) %>%
  na.omit %>% 
  {ggplot(.,aes(x = `Conc Hg Fish [ug/g]`,
                y = `Weight of Fish (g)`,
                color = `Type of Fish`))+
      geom_vline(xintercept = 0.5,color = "red")+
      geom_point()+
      theme(legend.position = 'none')} %>% 
  ggplotly

Fish conc/Age

mercury_data %>%
  select(`Conc Hg Fish [ug/g]`,
         `Age (years)`,
         `Type of Fish`) %>%
  na.omit %>% 
  {ggplot(.,aes(x = `Conc Hg Fish [ug/g]`,
                y = `Age (years)`,
                color = `Type of Fish`))+
      geom_vline(xintercept = 0.5,color = "red")+
      geom_point()+
      theme(legend.position = 'none')} %>% 
  ggplotly

Fish conc/Year of Publication

mercury_data %>%
  select(`Conc Hg Fish [ug/g]`,
         Published,
         `Type of Fish`) %>%
  na.omit %>% 
  {ggplot(.,aes(x = `Conc Hg Fish [ug/g]`,
                y = Published,
                color = `Type of Fish`))+
      geom_vline(xintercept = 0.5,color = "red")+
      geom_point()+
      theme(legend.position = 'none')} %>% 
  ggplotly

Fish conc/Sediment conc

mercury_data %>%
  select(`Conc Hg Fish [ug/g]`,
         `Conc Hg Sediment [ug/g]`,
         `Type of Fish`) %>%
  na.omit %>% 
  {ggplot(.,aes(x = `Conc Hg Fish [ug/g]`,
                y = `Conc Hg Sediment [ug/g]`,
                color = `Type of Fish`))+
      geom_vline(xintercept = 0.5,color = "red")+
      geom_point()+
      theme(legend.position = 'none')+
      coord_cartesian(expand = FALSE)} %>% 
  ggplotly

Fish conc/Region

mercury_data %>%
  select(`Conc Hg Fish [ug/g]`,
         Region) %>%
  na.omit %>% 
  {ggplot(.,aes(x = Region,
                y = `Conc Hg Fish [ug/g]`))+
      geom_hline(yintercept = 0.5,color = "red")+
      geom_boxplot()+
      theme(legend.position = 'none')+
      coord_cartesian(expand = FALSE)}

Fish conc/Type of Fish

All

mercury_data %>%
  mutate(`Type of Fish` = factor(x = `Type of Fish`,levels = fish_summary$`Type of Fish`[order(fish_summary$mean_conc,decreasing = T)])) %>% 
  select(`Conc Hg Fish [ug/g]`,
         `Type of Fish`) %>%
  na.omit %>% 
  {ggplot(.,aes(x = `Type of Fish`,
                y = `Conc Hg Fish [ug/g]`,
                text = `Type of Fish`))+
      geom_hline(yintercept = 0.5,color = "red")+
      geom_boxplot()+
      theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 6),
            legend.position = 'none')+
      coord_cartesian(expand = FALSE)} %>% 
  ggplotly(tooltip = "text")

Top 20

mercury_data %>%
  mutate(`Type of Fish` = factor(x = `Type of Fish`,
                                 levels = fish_summary$`Type of Fish`[order(fish_summary$mean_conc,
                                                                            decreasing = T)])) %>% 
  select(`Conc Hg Fish [ug/g]`,
         `Type of Fish`) %>%
  na.omit %>% 
  filter(as.numeric(`Type of Fish`)<=20) %>% 
  {ggplot(.,aes(x = `Type of Fish`,
                y = `Conc Hg Fish [ug/g]`,
                text = `Type of Fish`))+
      geom_hline(yintercept = 0.5,color = "red")+
      geom_boxplot()+
      theme(axis.text.x = element_text(angle = 90, hjust = 1),
            legend.position = 'none')+
      coord_cartesian(expand = FALSE)} %>% 
  ggplotly(tooltip = "text")